suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(hablar))
suppressPackageStartupMessages(library(tsibble))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(DT))
suppressPackageStartupMessages(library(ggplot2))
moduletwo <- read_excel( "~/Desktop/thesis_surveydata/political_factors_complete.xlsx")
save(moduletwo, file = "moduletwo.rda")
module2 <- data.frame(moduletwo)
module2 %>%
filter(!row_number() %in% c(1,2))
# loaded data set for module 2 eco -pol variables and removes the rows with question number and question text
#This chunk contains the coding schemes of various scales used in survey one: eco-political factors, kahan scale and acceptance scale
codedmodule2 <- module2 %>%
#remove row 1
filter(!row_number() %in% c(1,2)) %>%
# replace risky likert scale with numbers
mutate_at(vars(starts_with("Risky")), funs(case_when(. =="Not at all risky" ~ 1,
. =="Slightly risky" ~ 2,
. =="Moderately risky" ~ 3,
. =="Very risky" ~ 4,
. =="Extremely risky" ~ 5))) %>%
# replace beneficial likert scale with numbers
mutate_at(vars(starts_with("Ben")), funs(case_when(. =="Not at all beneficial" ~ 1,
. =="Slightly beneficial" ~ 2,
. =="Moderately beneficial" ~ 3,
. =="Very beneficial" ~ 4,
. =="Extremely beneficial" ~ 5 ))) %>%
# replace nuclear acceptance likert scale with numbers
mutate_at(vars(N_accept,N_reluctantlyaccept,N_reject), funs(case_when(. == "Strongly disagree" ~ 1,
. == "Somewhat disagree" ~ 2,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 4,
. == "Strongly agree" ~ 5))) %>%
# code likert scale for variables for Kahan scale into numbers
mutate_at(vars(starts_with (c("K_I","K_H","DISPLACE", "POLLUTE", "HEALTH", "JOBS", "BEAUTY", "PRIDE", "NPRIDE", "DEV", "PROSPER", "RELY"))), funs(case_when(. == "Strongly disagree" ~ 1,
. == "Somewhat disagree" ~ 2,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 4,
. == "Strongly agree" ~ 5))) %>%
# reverse code for likert scale for variables for Kahan scale into numbers
mutate_at(vars(starts_with (c("K_S","K_E"))), funs(case_when(. == "Strongly disagree" ~ 5,
. == "Somewhat disagree" ~ 4,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 2,
. == "Strongly agree" ~ 1))) %>%
# code eco-pol scale variables into numbers
mutate_at(vars(SYSTEMDEMO,SYSTEMRELIGION,SYSTEMTECHNO,SYSTEMTOTAL,WEALTHLIM,MECHANISATION,DECISIONDECEN,INDUSTRYLARGE,ECONOMYGLOBAL,OWNERPVT,OWNERNOREG), funs(case_when(. == "Strongly disagree" ~ 1,
. == "Somewhat disagree" ~ 2,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 4,
. == "Strongly agree" ~ 5))) %>%
# reverse code eco-pol scale variables into numbers
mutate_at(vars(DECISIONCEN, INDUSTRYSMALL, ECONOMYLOCAL, ENVOVERDEV,OWNERPUB, OWNERREG), funs(case_when(. == "Strongly disagree" ~ 5,
. == "Somewhat disagree" ~ 4,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 2,
. == "Strongly agree" ~ 1)))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
codedmodule2
# summary statistics for risks from different energy sources
risksummary <- codedmodule2 %>%
summarize_at(vars(starts_with(c("RISKY"))), list(~mean(., na.rm = TRUE), ~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE), ~n())) %>%
#add row index so later spreading indexed correctly
add_rownames()%>%
#melt to long format
gather(technology, value, -rowname) %>%
# separate risky from variable suffix
separate(technology, c("Risky", "var"), extra = "merge", fill = "left") %>%
#separate mean from variable prefix
separate(var, c("technology", "summary")) %>%
# spread summary values back to wide form
spread(summary,value) %>%
#clean up
select(-rowname, -Risky) %>%
arrange(mean)%>%
setNames(paste0('perceivedrisk.', names(.)))%>%
rename(technology = perceivedrisk.technology)
## Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
## Please use `tibble::rownames_to_column()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
risksummary
#plotting perceived risk from different technologies
module2%>%
filter(!row_number() %in% c(1,2)) %>%
select(starts_with("RISKY"))%>%
gather(Technology, Perceived_Risk, Risky_Hydro:Risky_INDLPG, factor_key = TRUE)%>%
separate(Technology, c("Risky", "Technology"), extra = "merge", fill = "left")%>%
select(-Risky) %>%
group_by(Technology, Perceived_Risk) %>%
summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
colorsequence<- c("#8C4646","#D9695F","#F2A679","#F2D091","#5D8C7B","#808080")
df_barplot<-module2%>%
filter(!row_number() %in% c(1,2)) %>%
select(starts_with("RISKY"))%>%
gather(Technology, Perceived_Risk, Risky_Hydro:Risky_INDLPG, factor_key = TRUE)%>%
separate(Technology, c("Risky", "Technology"), extra = "merge", fill = "left")%>%
select(-Risky) %>%
group_by(Technology, Perceived_Risk) %>%
summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
df_barplot %>% ggplot(aes(fill= fct_rev(factor(Perceived_Risk, levels=c("Extremely risky", "Very risky","Moderately risky","Slightly risky","Not at all risky","Rather not say/Don't know"))), y=n , x=Technology)) +
geom_bar(position="fill",stat="identity")+
scale_fill_manual("legend", values = c("Extremely risky" = "#8C4646", "Very risky" = "#D9695F", "Moderately risky" = "#F2A679", "Slightly risky" = "#F2D091","Not at all risky" = "#5D8C7B", "Rather not say/Don't know" = "#808080"))+
coord_flip()+
theme_classic()

# fix ugly visual. Rearrange from extremely risky to not at all risky. Red to green, rather not say/Don't know should be grey. Also facet wrap by INDividually owned and centrally owned technology.
# summary statistics for benefits from different energy sources
benefitsummary <- codedmodule2 %>%
summarize_at(vars(starts_with(c("Ben"))), list(~mean(., na.rm = TRUE), ~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE), ~n())) %>%
#add row index so later spreading indexed correctly
add_rownames()%>%
#melt to long format
gather(technology, value, -rowname) %>%
# separate Ben from variable suffix
separate(technology, c("Ben", "var"), extra = "merge", fill = "left") %>%
#separate mean from variable prefix
separate(var, c("technology", "summary")) %>%
# spread summary values back to wide form
spread(summary,value) %>%
#clean up
select(-rowname, -Ben) %>%
arrange(mean)%>%
setNames(paste0('perceivedbenefit.', names(.))) %>%
rename(technology = perceivedbenefit.technology)
benefitsummary
#round off to two decimal places
colorsequence<- c("#8C4646","#D9695F","#F2A679","#F2D091","#5D8C7B","#808080")
df_barplotben <- module2%>%
filter(!row_number() %in% c(1,2)) %>%
select(starts_with("Ben"))%>%
gather(Technology, Perceived_Benefit, Ben_Hydro:Ben_INDLPG, factor_key = TRUE)%>%
separate(Technology, c("Ben", "Technology"), extra = "merge", fill = "left")%>%
select(-Ben) %>%
group_by(Technology, Perceived_Benefit) %>%
summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
df_barplotben %>% ggplot(aes(fill= factor(Perceived_Benefit, levels=c("Extremely beneficial", "Very beneficial","Moderately beneficial","Slightly beneficial","Not at all beneficial","Rather not say/Don't know")), y=n , x=Technology)) +
geom_bar(position="fill",stat="identity")+
scale_fill_manual("legend", values = c("Extremely beneficial" = "#5D8C7B", "Very beneficial" = "#F2D091", "Moderately beneficial" = "#F2A679", "Slightly beneficial" = "#D9695F","Not at all beneficial" = "#8C4646", "Rather not say/Don't know" = "#808080"))+
coord_flip()+
theme_classic()

# fix ugly visual. Rearrange from extremely beneficial to not at all beneficial. Green to red, rather not say/Don't know should be grey. Also facet wrap by INDividually owned and centrally owned technology. Also arrange descending from most beneficial to least beneficial.
#summary statistics for nuclear energy accept, reject and reluctantly accept scale
codedmodule2 %>%
summarize_at(vars(N_accept,N_reject,N_reluctantlyaccept), list(~mean(., na.rm = TRUE),~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE),~n())) %>%
add_rownames() %>%
gather(acceptance, value, -rowname) %>%
separate(acceptance, c("N_accept", "var") , extra = "merge", fill = "left") %>%
separate(var, c("Acceptance", "summary")) %>%
spread(summary, value) %>%
select(-rowname, -N_accept)%>%
arrange(mean)
# chi square test for risk perception from nuclear energy and gender, race
# first combine gender and caste variables
chitest <- codedmodule2 %>%
select(Risky_Nuclear,Risky_Hydro, Risky_Hydro, Risky_Solar,Urban_Rural, Gender, Caste) %>%
na.omit() %>%
filter(!Caste=="Rather not say/Don't know")%>%
unite(gendercaste, Gender, Caste, sep = "_", remove = FALSE)
chitest
full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
select(technology, perceivedrisk.mean, perceivedbenefit.mean) %>%
mutate(technology = fct_reorder(technology, perceivedrisk.mean, .desc = TRUE))%>%
gather(key = "level", value = "mean",
perceivedrisk.mean, perceivedbenefit.mean) %>%
ggplot(aes(x=technology, y= mean, fill = level, ))+
geom_bar(stat="identity", position=position_dodge())+
ylim(0,4)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_fill_manual("legend", values = c("perceivedbenefit.mean" = "palegreen3", "perceivedrisk.mean" = "firebrick2"))
## Joining, by = "technology"

# arrange descending and facet wrap by IND prefix
full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
select(technology, perceivedrisk.mean, perceivedbenefit.mean) %>%
mutate(technology = fct_reorder(technology, perceivedbenefit.mean, .desc = TRUE))%>%
gather(key = "level", value = "mean",
perceivedrisk.mean, perceivedbenefit.mean) %>%
ggplot(aes(x=technology, y= mean, fill = level, ))+
geom_bar(stat="identity", position=position_dodge())+
ylim(0,4)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_fill_manual("legend", values = c("perceivedbenefit.mean" = "palegreen3", "perceivedrisk.mean" = "firebrick2"))
## Joining, by = "technology"

full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
select(technology, perceivedrisk.median, perceivedbenefit.median) %>%
mutate(technology = fct_reorder(technology, perceivedrisk.median, .desc = TRUE))%>%
gather(key = "level", value = "median",
perceivedrisk.median, perceivedbenefit.median) %>%
ggplot(aes(x=technology, y= median, fill = level, ))+
geom_bar(stat="identity", position=position_dodge())+
ylim(0,4)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_fill_manual("legend", values = c("perceivedbenefit.median" = "palegreen3", "perceivedrisk.median" = "firebrick2"))
## Joining, by = "technology"

# arrange descending and facet wrap by IND prefix
# arrange descending and facet wrap by IND prefix
#median score for Kahan invidualism scale
codedmodule2 %>%
select((starts_with(c("K_I", "K_S"))))%>%
gather(key = "K_I", value = "score")%>%
na.omit()%>%
summarise(median = median(c(score)))
# Kahan scale median score for Hierarchy scale
codedmodule2 %>%
select((starts_with(c("K_H", "K_E"))))%>%
gather(key = "K_H", value = "score")%>%
na.omit()%>%
summarise(median = median(c(score)))
#calculating individualism score for each respondent
Kscalescores <- codedmodule2 %>%
select(starts_with(c("Risky","K_I","K_S", "K_H", "K_E")))%>%
rowwise()%>%
na.omit()%>%
mutate(Individualism_score = mean(c_across(K_IINTRFER:K_SPROTECT)))%>%
mutate(Hierarchy_score = mean(c_across(K_HEQUAL:K_ERADEQ2)))%>%
select(!starts_with(c("K_I","K_S", "K_H", "K_E")))
Kscalescores
#scatter plot of Kahan scale scores around the median scores on Individualism and Hierarchy scales.
Kscalescores %>%
ggplot(aes(Individualism_score, Hierarchy_score))+
geom_point()+
geom_hline(yintercept=2, colour="black", lwd=1)+
geom_vline(xintercept=3, colour="black", lwd=1)

# checking distribution of scores on Individualism scale
Kscalescores %>%
ggplot(aes(x=Individualism_score, y = ..count..), fill = "lightgray")+
geom_density()

# checking distribution of scores on Hierarchy scale
Kscalescores %>%
ggplot(aes(x=Hierarchy_score, y = ..count..), fill = "lightgray")+
geom_density()

chisq.test(chitest$Risky_Nuclear, chitest$Urban_Rural)
##
## Pearson's Chi-squared test
##
## data: chitest$Risky_Nuclear and chitest$Urban_Rural
## X-squared = 16.389, df = 4, p-value = 0.002539